Goal: Do an MDS plot using Irene’s SNPs to look for relatedness among plants
First, need to call SNPs using all BAM files at once:
working in directory 2019/IreneSnps on whitney
first, sort bams
for f in SNPanalysis/*LT*rmdup.bam
do
newname=`basename $f .bam`_sort.bam
samtools sort -o $newname --reference SNPanalysis/Pinsaporeference1 $f
done
next, assign read groups to bams
input=""
for f in `ls *sort.bam`
do
rg=`basename $f _rmdup_sort.bam`
input="$input -b $f -r $rg -s $rg"
done
echo $input
bamaddrg $input > LT_rmdup_sort_combined.bam
samtools index LT_rmdup_sort_combined.bam
freebayes -f SNPanalysis/Pinsaporeference1 --no-indels --no-mnps --no-complex LT_rmdup_sort_combined.bam > LT.vcf &
try parallel
ulimit -n 4000
/usr/local/stow/freebayes/scripts/fasta_generate_regions.py Pinsaporeference1.fai 100000 > regions
./freebayes-parallel regions 8 -f Pinsaporeference1 --no-indels --no-mnps --no-complex LT_rmdup_sort_combined.bam > LT.vcf
(note: I edited the freebayes-parallel script so that it would work…)
Freeybayes parallel takes about 12 hours
scp whitney.plb.ucdavis.edu:2019/IreneSnps/LT.vcf.gz ../input/
library(tidyverse)
library(ggrepel)
get the vcf header
vcf.header <- system("zgrep '#C' ../input/LT.vcf.gz",intern = TRUE)
vcf.header
[1] "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t38LTR\t42LTR\t42LTRR\t43LTR\t43LTRR\t49LTWR\t49LTWRR\t95LTWR\t95LTWRR\t99LTWR"
vcf.header <- vcf.header %>%
str_replace("#","") %>% #get rid of the pound sign
str_split(pattern = "\t") %>% #split on the tabs
magrittr::extract2(1)
vcf.header
[1] "CHROM" "POS" "ID" "REF" "ALT" "QUAL" "FILTER" "INFO"
[9] "FORMAT" "38LTR" "42LTR" "42LTRR" "43LTR" "43LTRR" "49LTWR" "49LTWRR"
[17] "95LTWR" "95LTWRR" "99LTWR"
get the data
snps <- read_tsv("../input/LT.vcf.gz", na = c("","NA","."),comment="#",col_names = vcf.header) %>%
select(-ID, -FILTER) # these are empty columns
Parsed with column specification:
cols(
CHROM = col_character(),
POS = col_integer(),
ID = col_character(),
REF = col_character(),
ALT = col_character(),
QUAL = col_double(),
FILTER = col_character(),
INFO = col_character(),
FORMAT = col_character(),
`38LTR` = col_character(),
`42LTR` = col_character(),
`42LTRR` = col_character(),
`43LTR` = col_character(),
`43LTRR` = col_character(),
`49LTWR` = col_character(),
`49LTWRR` = col_character(),
`95LTWR` = col_character(),
`95LTWRR` = col_character(),
`99LTWR` = col_character()
)
|=========== | 16% 66 MB
|=========== | 16% 66 MB
|=========== | 16% 66 MB
|=========== | 16% 67 MB
|=========== | 16% 67 MB
|=========== | 17% 68 MB
|============ | 17% 68 MB
|============ | 17% 68 MB
|============ | 17% 69 MB
|============ | 17% 69 MB
|============ | 17% 69 MB
|============ | 17% 70 MB
|============ | 17% 70 MB
|============ | 17% 71 MB
|============ | 17% 71 MB
|============ | 18% 71 MB
|============ | 18% 72 MB
|============ | 18% 72 MB
|============ | 18% 73 MB
|============ | 18% 73 MB
|============ | 18% 73 MB
|============= | 18% 74 MB
|============= | 18% 74 MB
|============= | 18% 74 MB
|============= | 18% 75 MB
|============= | 18% 75 MB
|============= | 19% 76 MB
|============= | 19% 76 MB
|============= | 19% 76 MB
|============= | 19% 77 MB
|============= | 19% 77 MB
|============= | 19% 78 MB
|============= | 19% 78 MB
|============= | 19% 78 MB
|============= | 19% 79 MB
|============= | 19% 79 MB
|============== | 20% 79 MB
|============== | 20% 80 MB
|============== | 20% 80 MB
|============== | 20% 81 MB
|============== | 20% 81 MB
|============== | 20% 81 MB
|============== | 20% 82 MB
|============== | 20% 82 MB
|============== | 20% 83 MB
|============== | 20% 83 MB
|============== | 21% 83 MB
|============== | 21% 84 MB
|============== | 21% 84 MB
|============== | 21% 84 MB
|============== | 21% 85 MB
|=============== | 21% 85 MB
|=============== | 21% 86 MB
|=============== | 21% 86 MB
|=============== | 21% 86 MB
|=============== | 21% 87 MB
|=============== | 21% 87 MB
|=============== | 22% 87 MB
|=============== | 22% 88 MB
|=============== | 22% 88 MB
|=============== | 22% 89 MB
|=============== | 22% 89 MB
|=============== | 22% 89 MB
|=============== | 22% 90 MB
|=============== | 22% 90 MB
|=============== | 22% 91 MB
|================ | 22% 91 MB
|================ | 23% 91 MB
|================ | 23% 92 MB
|================ | 23% 92 MB
|================ | 23% 92 MB
|================ | 23% 93 MB
|================ | 23% 93 MB
|================ | 23% 94 MB
|================ | 23% 94 MB
|================ | 23% 94 MB
|================ | 23% 95 MB
|================ | 23% 95 MB
|================ | 24% 95 MB
|================ | 24% 96 MB
|================ | 24% 96 MB
|================= | 24% 97 MB
|================= | 24% 97 MB
|================= | 24% 97 MB
|================= | 24% 98 MB
|================= | 24% 98 MB
|================= | 24% 99 MB
|================= | 24% 99 MB
|================= | 25% 99 MB
|================= | 25% 100 MB
|================= | 25% 100 MB
|================= | 25% 100 MB
|================= | 25% 101 MB
|================= | 25% 101 MB
|================= | 25% 102 MB
|================= | 25% 102 MB
|================== | 25% 102 MB
|================== | 25% 103 MB
|================== | 25% 103 MB
|================== | 26% 104 MB
|================== | 26% 104 MB
|================== | 26% 104 MB
|================== | 26% 105 MB
|================== | 26% 105 MB
|================== | 26% 105 MB
|================== | 26% 106 MB
|================== | 26% 106 MB
|================== | 26% 107 MB
|================== | 26% 107 MB
|================== | 27% 107 MB
|================== | 27% 108 MB
|=================== | 27% 108 MB
|=================== | 27% 109 MB
|=================== | 27% 109 MB
|=================== | 27% 109 MB
|=================== | 27% 110 MB
|=================== | 27% 110 MB
|=================== | 27% 110 MB
|=================== | 27% 111 MB
|=================== | 27% 111 MB
|=================== | 28% 112 MB
|=================== | 28% 112 MB
|=================== | 28% 112 MB
|=================== | 28% 113 MB
|=================== | 28% 113 MB
|==================== | 28% 114 MB
|==================== | 28% 114 MB
|==================== | 28% 114 MB
|==================== | 28% 115 MB
|==================== | 28% 115 MB
|==================== | 29% 116 MB
|==================== | 29% 116 MB
|==================== | 29% 116 MB
|==================== | 29% 117 MB
|==================== | 29% 117 MB
|==================== | 29% 117 MB
|==================== | 29% 118 MB
|==================== | 29% 118 MB
|==================== | 29% 119 MB
|==================== | 29% 119 MB
|===================== | 30% 119 MB
|===================== | 30% 120 MB
|===================== | 30% 120 MB
|===================== | 30% 121 MB
|===================== | 30% 121 MB
|===================== | 30% 121 MB
|===================== | 30% 122 MB
|===================== | 30% 122 MB
|===================== | 30% 122 MB
|===================== | 30% 123 MB
|===================== | 31% 123 MB
|===================== | 31% 124 MB
|===================== | 31% 124 MB
|===================== | 31% 124 MB
|===================== | 31% 125 MB
|====================== | 31% 125 MB
|====================== | 31% 126 MB
|====================== | 31% 126 MB
|====================== | 31% 126 MB
|====================== | 31% 127 MB
|====================== | 31% 127 MB
|====================== | 32% 128 MB
|====================== | 32% 128 MB
|====================== | 32% 128 MB
|====================== | 32% 129 MB
|====================== | 32% 129 MB
|====================== | 32% 129 MB
|====================== | 32% 130 MB
|====================== | 32% 130 MB
|======================= | 32% 131 MB
|======================= | 32% 131 MB
|======================= | 33% 131 MB
|======================= | 33% 132 MB
|======================= | 33% 132 MB
|======================= | 33% 133 MB
|======================= | 33% 133 MB
|======================= | 33% 133 MB
|======================= | 33% 134 MB
|======================= | 33% 134 MB
|======================= | 33% 135 MB
|======================= | 33% 135 MB
|======================= | 34% 135 MB
|======================= | 34% 136 MB
|======================= | 34% 136 MB
|======================== | 34% 137 MB
|======================== | 34% 137 MB
|======================== | 34% 137 MB
|======================== | 34% 138 MB
|======================== | 34% 138 MB
|======================== | 34% 139 MB
|======================== | 34% 139 MB
|======================== | 35% 139 MB
|======================== | 35% 140 MB
|======================== | 35% 140 MB
|======================== | 35% 141 MB
|======================== | 35% 141 MB
|======================== | 35% 141 MB
|======================== | 35% 142 MB
|========================= | 35% 142 MB
|========================= | 35% 142 MB
|========================= | 35% 143 MB
|========================= | 36% 143 MB
|========================= | 36% 144 MB
|========================= | 36% 144 MB
|========================= | 36% 144 MB
|========================= | 36% 145 MB
|========================= | 36% 145 MB
|========================= | 36% 146 MB
|========================= | 36% 146 MB
|========================= | 36% 146 MB
|========================= | 36% 147 MB
|========================= | 37% 147 MB
|========================= | 37% 148 MB
|========================== | 37% 148 MB
|========================== | 37% 148 MB
|========================== | 37% 149 MB
|========================== | 37% 149 MB
|========================== | 37% 150 MB
|========================== | 37% 150 MB
|========================== | 37% 150 MB
|========================== | 37% 151 MB
|========================== | 37% 151 MB
|========================== | 38% 151 MB
|========================== | 38% 152 MB
|========================== | 38% 152 MB
|========================== | 38% 153 MB
|========================== | 38% 153 MB
|========================== | 38% 153 MB
|=========================== | 38% 154 MB
|=========================== | 38% 154 MB
|=========================== | 38% 155 MB
|=========================== | 38% 155 MB
|=========================== | 39% 155 MB
|=========================== | 39% 156 MB
|=========================== | 39% 156 MB
|=========================== | 39% 157 MB
|=========================== | 39% 157 MB
|=========================== | 39% 157 MB
|=========================== | 39% 158 MB
|=========================== | 39% 158 MB
|=========================== | 39% 159 MB
|=========================== | 39% 159 MB
|============================ | 40% 159 MB
|============================ | 40% 160 MB
|============================ | 40% 160 MB
|============================ | 40% 160 MB
|============================ | 40% 161 MB
|============================ | 40% 161 MB
|============================ | 40% 162 MB
|============================ | 40% 162 MB
|============================ | 40% 162 MB
|============================ | 40% 163 MB
|============================ | 41% 163 MB
|============================ | 41% 164 MB
|============================ | 41% 164 MB
|============================ | 41% 164 MB
|============================= | 41% 165 MB
|============================= | 41% 165 MB
|============================= | 41% 166 MB
|============================= | 41% 166 MB
|============================= | 41% 166 MB
|============================= | 41% 167 MB
|============================= | 42% 167 MB
|============================= | 42% 168 MB
|============================= | 42% 168 MB
|============================= | 42% 168 MB
|============================= | 42% 169 MB
|============================= | 42% 169 MB
|============================= | 42% 170 MB
|============================= | 42% 170 MB
|============================== | 42% 171 MB
|============================== | 42% 171 MB
|============================== | 43% 171 MB
|============================== | 43% 172 MB
|============================== | 43% 172 MB
|============================== | 43% 173 MB
|============================== | 43% 173 MB
|============================== | 43% 173 MB
|============================== | 43% 174 MB
|============================== | 43% 174 MB
|============================== | 43% 175 MB
|============================== | 44% 175 MB
|============================== | 44% 175 MB
|============================== | 44% 176 MB
|=============================== | 44% 176 MB
|=============================== | 44% 177 MB
|=============================== | 44% 177 MB
|=============================== | 44% 177 MB
|=============================== | 44% 178 MB
|=============================== | 44% 178 MB
|=============================== | 44% 179 MB
|=============================== | 44% 179 MB
|=============================== | 45% 179 MB
|=============================== | 45% 180 MB
|=============================== | 45% 180 MB
|=============================== | 45% 181 MB
|=============================== | 45% 181 MB
|=============================== | 45% 181 MB
|=============================== | 45% 182 MB
|================================ | 45% 182 MB
|================================ | 45% 183 MB
|================================ | 46% 183 MB
|================================ | 46% 183 MB
|================================ | 46% 184 MB
|================================ | 46% 184 MB
|================================ | 46% 185 MB
|================================ | 46% 185 MB
|================================ | 46% 185 MB
|================================ | 46% 186 MB
|================================ | 46% 186 MB
|================================ | 46% 187 MB
|================================ | 46% 187 MB
|================================ | 47% 187 MB
|================================= | 47% 188 MB
|================================= | 47% 188 MB
|================================= | 47% 188 MB
|================================= | 47% 189 MB
|================================= | 47% 189 MB
|================================= | 47% 190 MB
|================================= | 47% 190 MB
|================================= | 47% 190 MB
|================================= | 47% 191 MB
|================================= | 48% 191 MB
|================================= | 48% 192 MB
|================================= | 48% 192 MB
|================================= | 48% 193 MB
|================================= | 48% 193 MB
|================================= | 48% 193 MB
|================================== | 48% 194 MB
|================================== | 48% 194 MB
|================================== | 48% 194 MB
|================================== | 48% 195 MB
|================================== | 49% 195 MB
|================================== | 49% 196 MB
|================================== | 49% 196 MB
|================================== | 49% 196 MB
|================================== | 49% 197 MB
|================================== | 49% 197 MB
|================================== | 49% 198 MB
|================================== | 49% 198 MB
|================================== | 49% 199 MB
|================================== | 49% 199 MB
|=================================== | 50% 199 MB
|=================================== | 50% 200 MB
|=================================== | 50% 200 MB
|=================================== | 50% 201 MB
|=================================== | 50% 201 MB
|=================================== | 50% 201 MB
|=================================== | 50% 202 MB
|=================================== | 50% 202 MB
|=================================== | 50% 203 MB
|=================================== | 50% 203 MB
|=================================== | 51% 203 MB
|=================================== | 51% 204 MB
|=================================== | 51% 204 MB
|=================================== | 51% 204 MB
|==================================== | 51% 205 MB
|==================================== | 51% 205 MB
|==================================== | 51% 206 MB
|==================================== | 51% 206 MB
|==================================== | 51% 206 MB
|==================================== | 51% 207 MB
|==================================== | 52% 207 MB
|==================================== | 52% 208 MB
|==================================== | 52% 208 MB
|==================================== | 52% 208 MB
|==================================== | 52% 209 MB
|==================================== | 52% 209 MB
|==================================== | 52% 210 MB
|==================================== | 52% 210 MB
|==================================== | 52% 210 MB
|===================================== | 52% 211 MB
|===================================== | 53% 211 MB
|===================================== | 53% 212 MB
|===================================== | 53% 212 MB
|===================================== | 53% 212 MB
|===================================== | 53% 213 MB
|===================================== | 53% 213 MB
|===================================== | 53% 213 MB
|===================================== | 53% 214 MB
|===================================== | 53% 214 MB
|===================================== | 53% 215 MB
|===================================== | 54% 215 MB
|===================================== | 54% 216 MB
|===================================== | 54% 216 MB
|====================================== | 54% 216 MB
|====================================== | 54% 217 MB
|====================================== | 54% 217 MB
|====================================== | 54% 217 MB
|====================================== | 54% 218 MB
|====================================== | 54% 218 MB
|====================================== | 54% 219 MB
|====================================== | 55% 219 MB
|====================================== | 55% 219 MB
|====================================== | 55% 220 MB
|====================================== | 55% 220 MB
|====================================== | 55% 221 MB
|====================================== | 55% 221 MB
|====================================== | 55% 221 MB
|====================================== | 55% 222 MB
|======================================= | 55% 222 MB
|======================================= | 55% 223 MB
|======================================= | 55% 223 MB
|======================================= | 56% 223 MB
|======================================= | 56% 224 MB
|======================================= | 56% 224 MB
|======================================= | 56% 224 MB
|======================================= | 56% 225 MB
|======================================= | 56% 225 MB
|======================================= | 56% 226 MB
|======================================= | 56% 226 MB
|======================================= | 56% 226 MB
|======================================= | 56% 227 MB
|======================================= | 57% 227 MB
|======================================== | 57% 228 MB
|======================================== | 57% 228 MB
|======================================== | 57% 228 MB
|======================================== | 57% 229 MB
|======================================== | 57% 229 MB
|======================================== | 57% 230 MB
|======================================== | 57% 230 MB
|======================================== | 57% 230 MB
|======================================== | 57% 231 MB
|======================================== | 58% 231 MB
|======================================== | 58% 232 MB
|======================================== | 58% 232 MB
|======================================== | 58% 232 MB
|======================================== | 58% 233 MB
|======================================== | 58% 233 MB
|========================================= | 58% 233 MB
|========================================= | 58% 234 MB
|========================================= | 58% 234 MB
|========================================= | 58% 235 MB
|========================================= | 59% 235 MB
|========================================= | 59% 235 MB
|========================================= | 59% 236 MB
|========================================= | 59% 236 MB
|========================================= | 59% 237 MB
|========================================= | 59% 237 MB
|========================================= | 59% 237 MB
|========================================= | 59% 238 MB
|========================================= | 59% 238 MB
|========================================= | 59% 238 MB
|========================================= | 59% 239 MB
|========================================== | 60% 239 MB
|========================================== | 60% 240 MB
|========================================== | 60% 240 MB
|========================================== | 60% 240 MB
|========================================== | 60% 241 MB
|========================================== | 60% 241 MB
|========================================== | 60% 242 MB
|========================================== | 60% 242 MB
|========================================== | 60% 242 MB
|========================================== | 60% 243 MB
|========================================== | 61% 243 MB
|========================================== | 61% 244 MB
|========================================== | 61% 244 MB
|========================================== | 61% 244 MB
|=========================================== | 61% 245 MB
|=========================================== | 61% 245 MB
|=========================================== | 61% 246 MB
|=========================================== | 61% 246 MB
|=========================================== | 61% 246 MB
|=========================================== | 61% 247 MB
|=========================================== | 62% 247 MB
|=========================================== | 62% 248 MB
|=========================================== | 62% 248 MB
|=========================================== | 62% 248 MB
|=========================================== | 62% 249 MB
|=========================================== | 62% 249 MB
|=========================================== | 62% 249 MB
|=========================================== | 62% 250 MB
|=========================================== | 62% 250 MB
|============================================ | 62% 251 MB
|============================================ | 63% 251 MB
|============================================ | 63% 251 MB
|============================================ | 63% 252 MB
|============================================ | 63% 252 MB
|============================================ | 63% 253 MB
|============================================ | 63% 253 MB
|============================================ | 63% 253 MB
|============================================ | 63% 254 MB
|============================================ | 63% 254 MB
|============================================ | 63% 255 MB
|============================================ | 64% 255 MB
|============================================ | 64% 255 MB
|============================================ | 64% 256 MB
|============================================= | 64% 256 MB
|============================================= | 64% 256 MB
|============================================= | 64% 257 MB
|============================================= | 64% 257 MB
|============================================= | 64% 258 MB
|============================================= | 64% 258 MB
|============================================= | 64% 258 MB
|============================================= | 64% 259 MB
|============================================= | 65% 259 MB
|============================================= | 65% 260 MB
|============================================= | 65% 260 MB
|============================================= | 65% 260 MB
|============================================= | 65% 261 MB
|============================================= | 65% 261 MB
|============================================= | 65% 262 MB
|============================================== | 65% 262 MB
|============================================== | 65% 262 MB
|============================================== | 65% 263 MB
|============================================== | 66% 263 MB
|============================================== | 66% 264 MB
|============================================== | 66% 264 MB
|============================================== | 66% 264 MB
|============================================== | 66% 265 MB
|============================================== | 66% 265 MB
|============================================== | 66% 265 MB
|============================================== | 66% 266 MB
|============================================== | 66% 266 MB
|============================================== | 66% 267 MB
|============================================== | 67% 267 MB
|=============================================== | 67% 267 MB
|=============================================== | 67% 268 MB
|=============================================== | 67% 268 MB
|=============================================== | 67% 269 MB
|=============================================== | 67% 269 MB
|=============================================== | 67% 269 MB
|=============================================== | 67% 270 MB
|=============================================== | 67% 270 MB
|=============================================== | 67% 271 MB
|=============================================== | 68% 271 MB
|=============================================== | 68% 271 MB
|=============================================== | 68% 272 MB
|=============================================== | 68% 272 MB
|=============================================== | 68% 273 MB
|=============================================== | 68% 273 MB
|================================================ | 68% 273 MB
|================================================ | 68% 274 MB
|================================================ | 68% 274 MB
|================================================ | 68% 274 MB
|================================================ | 69% 275 MB
|================================================ | 69% 275 MB
|================================================ | 69% 276 MB
|================================================ | 69% 276 MB
|================================================ | 69% 277 MB
|================================================ | 69% 277 MB
|================================================ | 69% 277 MB
|================================================ | 69% 278 MB
|================================================ | 69% 278 MB
|================================================ | 69% 279 MB
|================================================= | 70% 279 MB
|================================================= | 70% 279 MB
|================================================= | 70% 280 MB
|================================================= | 70% 280 MB
|================================================= | 70% 280 MB
|================================================= | 70% 281 MB
|================================================= | 70% 281 MB
|================================================= | 70% 282 MB
|================================================= | 70% 282 MB
|================================================= | 70% 282 MB
|================================================= | 71% 283 MB
|================================================= | 71% 283 MB
|================================================= | 71% 284 MB
|================================================= | 71% 284 MB
|================================================= | 71% 284 MB
|================================================== | 71% 285 MB
|================================================== | 71% 285 MB
|================================================== | 71% 285 MB
|================================================== | 71% 286 MB
|================================================== | 71% 286 MB
|================================================== | 71% 287 MB
|================================================== | 72% 287 MB
|================================================== | 72% 287 MB
|================================================== | 72% 288 MB
|================================================== | 72% 288 MB
|================================================== | 72% 289 MB
|================================================== | 72% 289 MB
|================================================== | 72% 289 MB
|================================================== | 72% 290 MB
|================================================== | 72% 290 MB
|=================================================== | 72% 290 MB
|=================================================== | 73% 291 MB
|=================================================== | 73% 291 MB
|=================================================== | 73% 292 MB
|=================================================== | 73% 292 MB
|=================================================== | 73% 292 MB
|=================================================== | 73% 293 MB
|=================================================== | 73% 293 MB
|=================================================== | 73% 294 MB
|=================================================== | 73% 294 MB
|=================================================== | 73% 294 MB
|=================================================== | 74% 295 MB
|=================================================== | 74% 295 MB
|=================================================== | 74% 296 MB
|==================================================== | 74% 296 MB
|==================================================== | 74% 296 MB
|==================================================== | 74% 297 MB
|==================================================== | 74% 297 MB
|==================================================== | 74% 298 MB
|==================================================== | 74% 298 MB
|==================================================== | 74% 298 MB
|==================================================== | 74% 299 MB
|==================================================== | 75% 299 MB
|==================================================== | 75% 299 MB
|==================================================== | 75% 300 MB
|==================================================== | 75% 300 MB
|==================================================== | 75% 301 MB
|==================================================== | 75% 301 MB
|==================================================== | 75% 301 MB
|===================================================== | 75% 302 MB
|===================================================== | 75% 302 MB
|===================================================== | 75% 303 MB
|===================================================== | 76% 303 MB
|===================================================== | 76% 303 MB
|===================================================== | 76% 304 MB
|===================================================== | 76% 304 MB
|===================================================== | 76% 304 MB
|===================================================== | 76% 305 MB
|===================================================== | 76% 305 MB
|===================================================== | 76% 306 MB
|===================================================== | 76% 306 MB
|===================================================== | 76% 306 MB
|===================================================== | 77% 307 MB
|===================================================== | 77% 307 MB
|====================================================== | 77% 308 MB
|====================================================== | 77% 308 MB
|====================================================== | 77% 308 MB
|====================================================== | 77% 309 MB
|====================================================== | 77% 309 MB
|====================================================== | 77% 310 MB
|====================================================== | 77% 310 MB
|====================================================== | 77% 310 MB
|====================================================== | 78% 311 MB
|====================================================== | 78% 311 MB
|====================================================== | 78% 312 MB
|====================================================== | 78% 312 MB
|====================================================== | 78% 312 MB
|====================================================== | 78% 313 MB
|======================================================= | 78% 313 MB
|======================================================= | 78% 313 MB
|======================================================= | 78% 314 MB
|======================================================= | 78% 314 MB
|======================================================= | 78% 315 MB
|======================================================= | 79% 315 MB
|======================================================= | 79% 315 MB
|======================================================= | 79% 316 MB
|======================================================= | 79% 316 MB
|======================================================= | 79% 317 MB
|======================================================= | 79% 317 MB
|======================================================= | 79% 317 MB
|======================================================= | 79% 318 MB
|======================================================= | 79% 318 MB
|======================================================= | 79% 319 MB
|======================================================== | 80% 319 MB
|======================================================== | 80% 319 MB
|======================================================== | 80% 320 MB
|======================================================== | 80% 320 MB
|======================================================== | 80% 320 MB
|======================================================== | 80% 321 MB
|======================================================== | 80% 321 MB
|======================================================== | 80% 322 MB
|======================================================== | 80% 322 MB
|======================================================== | 80% 322 MB
|======================================================== | 81% 323 MB
|======================================================== | 81% 323 MB
|======================================================== | 81% 324 MB
|======================================================== | 81% 324 MB
|======================================================== | 81% 324 MB
|========================================================= | 81% 325 MB
|========================================================= | 81% 325 MB
|========================================================= | 81% 326 MB
|========================================================= | 81% 326 MB
|========================================================= | 81% 326 MB
|========================================================= | 82% 327 MB
|========================================================= | 82% 327 MB
|========================================================= | 82% 328 MB
|========================================================= | 82% 328 MB
|========================================================= | 82% 328 MB
|========================================================= | 82% 329 MB
|========================================================= | 82% 329 MB
|========================================================= | 82% 330 MB
|========================================================= | 82% 330 MB
|========================================================== | 82% 330 MB
|========================================================== | 83% 331 MB
|========================================================== | 83% 331 MB
|========================================================== | 83% 332 MB
|========================================================== | 83% 332 MB
|========================================================== | 83% 332 MB
|========================================================== | 83% 333 MB
|========================================================== | 83% 333 MB
|========================================================== | 83% 333 MB
|========================================================== | 83% 334 MB
|========================================================== | 83% 334 MB
|========================================================== | 83% 335 MB
|========================================================== | 84% 335 MB
|========================================================== | 84% 335 MB
|========================================================== | 84% 336 MB
|=========================================================== | 84% 336 MB
|=========================================================== | 84% 337 MB
|=========================================================== | 84% 337 MB
|=========================================================== | 84% 337 MB
|=========================================================== | 84% 338 MB
|=========================================================== | 84% 338 MB
|=========================================================== | 84% 339 MB
|=========================================================== | 85% 339 MB
|=========================================================== | 85% 339 MB
|=========================================================== | 85% 340 MB
|=========================================================== | 85% 340 MB
|=========================================================== | 85% 341 MB
|=========================================================== | 85% 341 MB
|=========================================================== | 85% 341 MB
|============================================================ | 85% 342 MB
|============================================================ | 85% 342 MB
|============================================================ | 85% 343 MB
|============================================================ | 86% 343 MB
|============================================================ | 86% 343 MB
|============================================================ | 86% 344 MB
|============================================================ | 86% 344 MB
|============================================================ | 86% 345 MB
|============================================================ | 86% 345 MB
|============================================================ | 86% 345 MB
|============================================================ | 86% 346 MB
|============================================================ | 86% 346 MB
|============================================================ | 86% 346 MB
|============================================================ | 87% 347 MB
|============================================================= | 87% 347 MB
|============================================================= | 87% 348 MB
|============================================================= | 87% 348 MB
|============================================================= | 87% 348 MB
|============================================================= | 87% 349 MB
|============================================================= | 87% 349 MB
|============================================================= | 87% 349 MB
|============================================================= | 87% 350 MB
|============================================================= | 87% 350 MB
|============================================================= | 88% 351 MB
|============================================================= | 88% 351 MB
|============================================================= | 88% 351 MB
|============================================================= | 88% 352 MB
|============================================================= | 88% 352 MB
|============================================================= | 88% 352 MB
|============================================================= | 88% 353 MB
|============================================================== | 88% 353 MB
|============================================================== | 88% 354 MB
|============================================================== | 88% 354 MB
|============================================================== | 88% 354 MB
|============================================================== | 89% 355 MB
|============================================================== | 89% 355 MB
|============================================================== | 89% 355 MB
|============================================================== | 89% 356 MB
|============================================================== | 89% 356 MB
|============================================================== | 89% 357 MB
|============================================================== | 89% 357 MB
|============================================================== | 89% 357 MB
|============================================================== | 89% 358 MB
|============================================================== | 89% 358 MB
|============================================================== | 89% 358 MB
|=============================================================== | 90% 359 MB
|=============================================================== | 90% 359 MB
|=============================================================== | 90% 359 MB
|=============================================================== | 90% 360 MB
|=============================================================== | 90% 360 MB
|=============================================================== | 90% 360 MB
|=============================================================== | 90% 361 MB
|=============================================================== | 90% 361 MB
|=============================================================== | 90% 362 MB
|=============================================================== | 90% 362 MB
|=============================================================== | 90% 362 MB
|=============================================================== | 91% 363 MB
|=============================================================== | 91% 363 MB
|=============================================================== | 91% 364 MB
|=============================================================== | 91% 364 MB
|================================================================ | 91% 364 MB
|================================================================ | 91% 365 MB
|================================================================ | 91% 365 MB
|================================================================ | 91% 366 MB
|================================================================ | 91% 366 MB
|================================================================ | 91% 366 MB
|================================================================ | 92% 367 MB
|================================================================ | 92% 367 MB
|================================================================ | 92% 367 MB
|================================================================ | 92% 368 MB
|================================================================ | 92% 368 MB
|================================================================ | 92% 369 MB
|================================================================ | 92% 369 MB
|================================================================ | 92% 369 MB
|================================================================ | 92% 370 MB
|================================================================= | 92% 370 MB
|================================================================= | 92% 370 MB
|================================================================= | 93% 371 MB
|================================================================= | 93% 371 MB
|================================================================= | 93% 372 MB
|================================================================= | 93% 372 MB
|================================================================= | 93% 372 MB
|================================================================= | 93% 373 MB
|================================================================= | 93% 373 MB
|================================================================= | 93% 373 MB
|================================================================= | 93% 374 MB
|================================================================= | 93% 374 MB
|================================================================= | 93% 374 MB
|================================================================= | 94% 375 MB
|================================================================= | 94% 375 MB
|================================================================= | 94% 375 MB
|================================================================== | 94% 376 MB
|================================================================== | 94% 376 MB
|================================================================== | 94% 377 MB
|================================================================== | 94% 377 MB
|================================================================== | 94% 377 MB
|================================================================== | 94% 378 MB
|================================================================== | 94% 378 MB
|================================================================== | 94% 378 MB
|================================================================== | 95% 379 MB
|================================================================== | 95% 379 MB
|================================================================== | 95% 379 MB
|================================================================== | 95% 380 MB
|================================================================== | 95% 380 MB
|================================================================== | 95% 380 MB
|================================================================== | 95% 381 MB
|================================================================== | 95% 381 MB
|=================================================================== | 95% 382 MB
|=================================================================== | 95% 382 MB
|=================================================================== | 95% 382 MB
|=================================================================== | 95% 383 MB
|=================================================================== | 96% 383 MB
|=================================================================== | 96% 383 MB
|=================================================================== | 96% 384 MB
|=================================================================== | 96% 384 MB
|=================================================================== | 96% 384 MB
|=================================================================== | 96% 385 MB
|=================================================================== | 96% 385 MB
|=================================================================== | 96% 385 MB
|=================================================================== | 96% 386 MB
|=================================================================== | 96% 386 MB
|=================================================================== | 96% 386 MB
|=================================================================== | 97% 387 MB
|=================================================================== | 97% 387 MB
|==================================================================== | 97% 387 MB
|==================================================================== | 97% 388 MB
|==================================================================== | 97% 388 MB
|==================================================================== | 97% 388 MB
|==================================================================== | 97% 389 MB
|==================================================================== | 97% 389 MB
|==================================================================== | 97% 389 MB
|==================================================================== | 97% 390 MB
|==================================================================== | 97% 390 MB
|==================================================================== | 97% 390 MB
|==================================================================== | 98% 391 MB
|==================================================================== | 98% 391 MB
|==================================================================== | 98% 391 MB
|==================================================================== | 98% 392 MB
|==================================================================== | 98% 392 MB
|==================================================================== | 98% 392 MB
|==================================================================== | 98% 393 MB
|=====================================================================| 98% 393 MB
|=====================================================================| 98% 393 MB
|=====================================================================| 98% 394 MB
|=====================================================================| 98% 394 MB
|=====================================================================| 98% 394 MB
|=====================================================================| 99% 395 MB
|=====================================================================| 99% 395 MB
|=====================================================================| 99% 395 MB
|=====================================================================| 99% 396 MB
|=====================================================================| 99% 396 MB
|=====================================================================| 99% 396 MB
|=====================================================================| 99% 397 MB
|=====================================================================| 99% 397 MB
|=====================================================================| 99% 397 MB
|=====================================================================| 99% 398 MB
|=====================================================================| 99% 398 MB
|=====================================================================| 99% 398 MB
|======================================================================| 100% 398 MB
snps
filter to keep snps where there is data from all samples
snps <- snps %>%
filter({select(., matches("[0-9]")) %>% complete.cases() })
snps
snps <- snps %>%
mutate(TOTAL_DEPTH= {str_extract(INFO, "DP=[0-9]*") %>%
str_remove("DP=") %>%
as.numeric() }
) %>%
filter(QUAL >=100,
nchar(ALT)==1,
TOTAL_DEPTH > quantile(TOTAL_DEPTH, 0.05),
TOTAL_DEPTH < quantile(TOTAL_DEPTH, 0.95))
snps
unpack the information differnet samples:
samples <- colnames(snps) %>% str_subset("^[0-9]")
for (s in samples) {
snps <- snps %>%
separate(!!s, into=paste(s,c("gt","tot.depth","allele.depth","ref.depth","ref.qual","alt.depth","alt.qual","gt.lik"),sep="_"),
sep=":", convert = TRUE)
}
snps
For the MDS we only need the genotype info
gts <- snps %>%
select(ends_with("_gt"))
gts
convert this to numeric
geno.numeric <- gts %>%
lapply(factor) %>% # convert charcters to "factors", where each category is internally represented as a number
as.data.frame() %>% # reformat
data.matrix() %>%# convert to numeric
t()
head(geno.numeric[,1:5],10)
[,1] [,2] [,3] [,4] [,5]
X38LTR_gt 2 2 2 2 2
X42LTR_gt 2 3 2 2 2
X42LTRR_gt 2 2 2 2 1
X43LTR_gt 2 2 2 2 2
X43LTRR_gt 2 2 2 2 2
X49LTWR_gt 2 2 2 2 2
X49LTWRR_gt 3 3 3 3 1
X95LTWR_gt 2 3 3 2 2
X95LTWRR_gt 2 3 3 2 2
X99LTWR_gt 2 2 2 2 2
dim(geno.numeric)
[1] 10 134421
dim(gts)
[1] 134421 10
genDist <- as.matrix(dist(geno.numeric))
dim(genDist)
[1] 10 10
#perform the multi-dimensional scaling
geno.mds <- as.tibble(cmdscale(genDist))
geno.mds$ID <- rownames(geno.numeric)
geno.mds <- geno.mds %>%
mutate(ID={str_replace(ID, "W", "N") %>%
str_replace("RR", "R2") %>%
str_remove_all("(X|_gt)") },
response=ifelse(str_detect(ID,"N"), "no recovery", "recovery"))
geno.mds
geno.mds %>%
ggplot(aes(x=V1, y = V2, label=ID, color=response)) +
geom_point() +
geom_text_repel(show.legend=FALSE, direction="y")
convert this to numeric
geno.numeric <- gts %>%
select(-`38LTR_gt`) %>%
lapply(factor) %>% # convert charcters to "factors", where each category is internally represented as a number
as.data.frame() %>% # reformat
data.matrix() %>%# convert to numeric
t()
head(geno.numeric[,1:5],10)
[,1] [,2] [,3] [,4] [,5]
X42LTR_gt 2 3 2 2 2
X42LTRR_gt 2 2 2 2 1
X43LTR_gt 2 2 2 2 2
X43LTRR_gt 2 2 2 2 2
X49LTWR_gt 2 2 2 2 2
X49LTWRR_gt 3 3 3 3 1
X95LTWR_gt 2 3 3 2 2
X95LTWRR_gt 2 3 3 2 2
X99LTWR_gt 2 2 2 2 2
dim(geno.numeric)
[1] 9 134421
dim(gts)
[1] 134421 10
genDist <- as.matrix(dist(geno.numeric))
dim(genDist)
[1] 9 9
#perform the multi-dimensional scaling
geno.mds <- as.tibble(cmdscale(genDist))
geno.mds$ID <- rownames(geno.numeric)
geno.mds <- geno.mds %>%
mutate(ID={str_replace(ID, "W", "N") %>%
str_replace("RR", "R2") %>%
str_remove_all("(X|_gt)") },
response=ifelse(str_detect(ID,"N"), "no recovery", "recovery"))
geno.mds
geno.mds %>%
ggplot(aes(x=V1, y = V2, label=ID, color=response)) +
geom_point() +
geom_text_repel(show.legend=FALSE, direction="y")